rm(list = ls())

library(data.table)

# source function to standardize outcomes
source('./replication_hasz/code/icw_index_missing.R')

#### clean study 1 data
# read study 1 data
data_study1 <- fread('./replication_hasz/data/study1.csv')
data_study1 <- data_study1[-c(1,2)]
colnames(data_study1) <- tolower(names(data_study1))

# clean treatment vars
data_study1[, t1:=ifelse(fl_12_do=='Block4-Control',1,0)]
data_study1[, t2:=ifelse(fl_12_do=='Block4-ExperimentalMatch',1,0)]
data_study1[, t3:=ifelse(fl_12_do=='Block4-ExperimentMisMatch',1,0)]
data_study1[t1==1,treatment:=1]
data_study1[t2==1,treatment:=2]
data_study1[t3==1,treatment:=3]

# clean outcomes vars
#standardize outcomes relative to control group
sgroup <- data_study1[ , as.logical(t1)]

data_study1[localvoteus=='Definitely would',vote_us:=5]
data_study1[localvoteus=='Probably would',vote_us:=4]
data_study1[localvoteus=='May or may not',vote_us:=3]
data_study1[localvoteus=='Probably would not',vote_us:=2]
data_study1[localvoteus=='Definitely would not',vote_us:=1]
data_study1[,vote_usc:=vote_us]
data_study1[, vote_us:=matStand(as.matrix(data_study1$vote_us), sgroup = sgroup)]

data_study1[localvoteyour=='Definitely would',vote_own:=5]
data_study1[localvoteyour=='Probably would',vote_own:=4]
data_study1[localvoteyour=='May or may not',vote_own:=3]
data_study1[localvoteyour=='Probably would not',vote_own:=2]
data_study1[localvoteyour=='Definitely would not',vote_own:=1]
data_study1[ ,vote_ownc:=vote_own]
data_study1[, vote_own:=matStand(as.matrix(data_study1$vote_own), sgroup = sgroup)]

data_study1[ballotfl=='definitely would',ballot:=5]
data_study1[ballotfl=='probably would',ballot:=4]
data_study1[ballotfl=='may or may not',ballot:=3]
data_study1[ballotfl=='probably would not',ballot:=2]
data_study1[ballotfl=='definitely would not',ballot:=1]
data_study1[, ballotc:=ballot]
data_study1[, ballot:=matStand(as.matrix(data_study1$ballot), sgroup = sgroup)]

data_study1[behavioralout=='No, finish and submit my answers to this survey.', more_info:=0]
data_study1[behavioralout=='Yes, redirect me to a website to learn more, and submit my answers to this survey.', more_info:=1]
data_study1[,more_infoc:=more_info]
data_study1[, more_info:=matStand(as.matrix(data_study1$more_info), sgroup = sgroup)]

#clean covariates
data_study1[, college:=ifelse((education=="Bachelor's degree (for example: BA, BS)" |
                           education=="Associate's degree (for example: AA, AS)" |
                           education=="Post graduate degree (for example: Master's degree, professional degree beyond a bachelor's, doctorate degree)"),
                        1,0)]
data_study1[college==0 & education=="", college:=NA]

data_study1[, male:=ifelse(sex=="Male",1,0)]
data_study1[male==0 & sex=="", male:=NA]
data_study1[,white:=ifelse(race=='White',1,0)]
data_study1[white==0 & race=="", white:=NA]
data_study1[, latino:=ifelse(hispanic=="Yes",1,0)]
data_study1[latino==0 & hispanic=="", latino:=NA]
data_study1[, republican:=ifelse(partyid=="Republican",1,0)] 
data_study1[republican==0 & partyid=="", republican:=NA]
data_study1[, independent:=ifelse(partyid=="Independent",1,0)] 
data_study1[independent==0 & partyid=="", independent:=NA]
data_study1[, democrat:=ifelse(partyid=="Democrat",1,0)] 
data_study1[democrat==0 & partyid=="", democrat:=NA]
data_study1[(partystrengthdr=="Strong ${q://QID13/ChoiceGroup/SelectedChoices}" |
         partystrenghi=="Neither party"), strong_partisan:=1]
data_study1[(partystrengthdr=="Not very strong ${q://QID13/ChoiceGroup/SelectedChoices}" |
         partystrenghi=="Closer to Republican party" | partystrenghi=="Closer to Democratic party"), strong_partisan:=0]
data_study1[ , work:=ifelse((employment=="Full time" | employment=="Part time"), 1, 0)]
data_study1[work==0 & employment=="", work:=NA]
data_study1[ , w_partner:=ifelse(married=="Married" | married=="Domestic Partnership",1,0)]
data_study1[w_partner==0 & married=="", w_partner:=NA]
data_study1[immigrationatt=="Decreased a lot", att_mig:=1]
data_study1[immigrationatt=="Decreased a little", att_mig:=2]
data_study1[immigrationatt=="Left the same as it is now", att_mig:=3]
data_study1[immigrationatt=="Increased a little", att_mig:=4]
data_study1[immigrationatt=="Increased a lot", att_mig:=5]
data_study1[ , native_born:=ifelse(bornus=='Yes',1,0)]
data_study1[native_born==0 & bornus=="", native_born:=NA]
data_study1[localinterest=="Not interested at all", interest_politics:=1]
data_study1[localinterest=="Not very interested", interest_politics:=2]
data_study1[localinterest=="In between", interest_politics:=3]
data_study1[localinterest=="Somewhat interested", interest_politics:=4]
data_study1[localinterest=="Very interested", interest_politics:=5]


#### clean study 2 data
data_study2 <- data.table(read.csv('./replication_hasz/data/study2.csv'))

# clean treatment var
data_study2[, t1:=ifelse(fl_12_do=='Block4-Control',1,0)]
data_study2[, t2:=ifelse(fl_12_do=='Block4-ExperimentalMatch',1,0)]
data_study2[, t3:=ifelse(fl_12_do=='Block4-ExperimentMisMatch',1,0)]
data_study2[t1==1,treatment:=1]
data_study2[t2==1,treatment:=2]
data_study2[t3==1,treatment:=3]

# clean outcomes vars and standardize relative to control group
sgroup <- data_study2[ , as.logical(t1)]

data_study2[localvoteus=='Definitely would',vote_us:=5]
data_study2[localvoteus=='Probably would',vote_us:=4]
data_study2[localvoteus=='May or may not',vote_us:=3]
data_study2[localvoteus=='Probably would not',vote_us:=2]
data_study2[localvoteus=='Definitely would not',vote_us:=1]
data_study2[,vote_usc:=vote_us]
data_study2[, vote_us:=matStand(as.matrix(data_study2$vote_us), sgroup = sgroup)]

data_study2[localvoteyour=='Definitely would',vote_own:=5]
data_study2[localvoteyour=='Probably would',vote_own:=4]
data_study2[localvoteyour=='May or may not',vote_own:=3]
data_study2[localvoteyour=='Probably would not',vote_own:=2]
data_study2[localvoteyour=='Definitely would not',vote_own:=1]
data_study2[ ,vote_ownc:=vote_own]
data_study2[, vote_own:=matStand(as.matrix(data_study2$vote_own), sgroup = sgroup)]

data_study2[ballotfl=='definitely would',ballot:=5]
data_study2[ballotfl=='probably would',ballot:=4]
data_study2[ballotfl=='may or may not',ballot:=3]
data_study2[ballotfl=='probably would not',ballot:=2]
data_study2[ballotfl=='definitely would not',ballot:=1]
data_study2[, ballotc:=ballot]
data_study2[, ballot:=matStand(as.matrix(data_study2$ballot), sgroup = sgroup)]

data_study2[behavioralout=='No', more_info:=0]
data_study2[behavioralout=='Yes', more_info:=1]
data_study2[,more_infoc:=more_info]
data_study2[, more_info:=matStand(as.matrix(data_study2$more_info), sgroup = sgroup)]

# clean covariates
data_study2[, college:=ifelse((education=="Bachelor's degree (for example: BA, BS)" |
                           education=="Associate's degree (for example: AA, AS)" |
                           education=="Post graduate degree (for example: Master's degree, professional degree beyond a bachelor's, doctorate degree)"),
                        1,0)]
data_study2[college==0 & education=="", college:=NA]

data_study2[, male:=ifelse(sex=="Male",1,0)]
data_study2[male==0 & sex=="", male:=NA]
data_study2[,white:=ifelse(race=='White',1,0)]
data_study2[white==0 & race=="", white:=NA]
data_study2[, latino:=ifelse(hispanic=="Yes",1,0)]
data_study2[latino==0 & hispanic=="", latino:=NA]

data_study2[, republican:=ifelse((`qualifying..partyid`=="Republican" | `qualify.ilean`=="Closer to Republican party"),1,0)] 
data_study2[, democrat:=ifelse((`qualifying..partyid`=="Democrat" | `qualify.ilean`=="Closer to Democratic party"),1,0)] 

data_study2[(partystrengthdr=="Strong ${q://QID13/ChoiceGroup/SelectedChoices}"), strong_partisan:=1]
data_study2[(partystrengthdr=="Not very strong ${q://QID13/ChoiceGroup/SelectedChoices}" |
         `qualifying..partyid`=="Independent"), strong_partisan:=0]

data_study2[ , work:=ifelse((employment=="Full time" | employment=="Part time"), 1, 0)]
data_study2[work==0 & employment=="", work:=NA]
data_study2[ , w_partner:=ifelse(married=="Married" | married=="Domestic Partnership",1,0)]
data_study2[w_partner==0 & married=="", w_partner:=NA]
data_study2[immigrationatt=="Decreased a lot", att_mig:=1]
data_study2[immigrationatt=="Decreased a little", att_mig:=2]
data_study2[immigrationatt=="Left the same as it is now", att_mig:=3]
data_study2[immigrationatt=="Increased a little", att_mig:=4]
data_study2[immigrationatt=="Increased a lot", att_mig:=5]
data_study2[ , native_born:=ifelse(bornus=='Yes',1,0)]
data_study2[native_born==0 & bornus=="", native_born:=NA]
data_study2[localinterest=="Not interested at all", interest_politics:=1]
data_study2[localinterest=="Not very interested", interest_politics:=2]
data_study2[localinterest=="In between", interest_politics:=3]
data_study2[localinterest=="Somewhat interested", interest_politics:=4]
data_study2[localinterest=="Very interested", interest_politics:=5]

### pool data for mean plots by study
data_pooled <- rbind(data_study1, data_study2, fill=TRUE)
data_pooled <- data_pooled[!is.na(treatment)]
data_pooled[,study:=ifelse((as.Date(substr(startdate,1,10),format='%Y-%m-%d')<'2022-04-18'),'Florida Study', 'U.S. Study')]

data_pooled2 <- rbind(data_study1, data_study2, fill=TRUE)
data_pooled2 <- data_pooled2[!is.na(treatment)]
data_pooled2[,study:='Pooled']

data <- rbind(data_pooled, data_pooled2)
data$study <- factor(data$study, levels=c('Pooled','Florida Study','U.S. Study'))
saveRDS(data, './replication_hasz/output/data/data_pooled.rds')

